?function_name.library(devtools)
install_github("wenweixiong/MARVEL")
install.packages("MARVEL")
library(MARVEL)
library(data.table) # Fast read-in of input files using fread() function
library(pdp) # Arranging output plots using grid.arrange() function
tran_id column of the relevant metadata. For detailed information of splicing nomenclature, please visit https://wenweixiong.github.io/Splicing_Nomenclature.# Read splice junction counts file
path <- "Data/SJ/"
file <- "SJ.txt"
sj <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))
sj[50000:50005,1:5]
## coord.intron ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 50000 chr1:20779912:20780418 NA NA NA NA
## 50001 chr1:20779912:20786185 NA 0 NA NA
## 50002 chr1:20779912:20786252 NA NA NA NA
## 50003 chr1:20779912:20786648 1 NA NA 1
## 50004 chr1:20779912:20787194 NA 8 4 NA
## 50005 chr1:20779912:20807151 NA NA NA NA
# Read phenoData (sample metadata)
path <- "Data/SJ/"
file <- "SJ_phenoData.txt"
df.pheno <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
head(df.pheno)
## sample.id cell.type sample.type qc.seq
## 1 ERR1562083 Unknown Single Cell pass
## 2 ERR1562084 iPSC Single Cell pass
## 3 ERR1562085 iPSC Single Cell pass
## 4 ERR1562086 iPSC Single Cell pass
## 5 ERR1562087 iPSC Single Cell pass
## 6 ERR1562088 iPSC Single Cell pass
# Read featureData (splicing metadata)
# SE
path <- "Data/rMATS/SE/"
file <- "SE_featureData.txt"
df.feature.se <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.se[1:5, ]
## tran_id
## 1 chrX:156022314:156022459:+@chrX:156022699:156022834:+@chrX:156023012:156023209
## 2 chrX:154227192:154227360:+@chrX:154228828:154228993:+@chrX:154230548:154230598
## 3 chrX:154190054:154190222:+@chrX:154191688:154191853:+@chrX:154193408:154193458
## 4 chrX:154152940:154153108:+@chrX:154154574:154154739:+@chrX:154156294:154156344
## 5 chrX:152918983:152919081:+@chrX:152920328:152920411:+@chrX:152920707:152920748
## gene_id gene_short_name gene_type
## 1 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 2 ENSG00000166160.9 OPN1MW2 protein_coding
## 3 ENSG00000268221.5 OPN1MW protein_coding
## 4 ENSG00000102076.9 OPN1LW protein_coding
## 5 ENSG00000147394.18 ZNF185 protein_coding
# MXE
path <- "Data/rMATS/MXE/"
file <- "MXE_featureData.txt"
df.feature.mxe <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.mxe[1:5, ]
## tran_id
## 1 chrX:1295427:1295456:+@chrX:1300491:1300626:+@chrX:1303923:1304019:+@chrX:1305446:1305527
## 2 chr19:54874280:54874323:+@chr19:54875330:54875365:+@chr19:54880836:54880913:+@chr19:54885235:54885525
## 3 chr19:35294235:35294290:+@chr19:35295386:35295454:+@chr19:35295455:35295510:+@chr19:35295613:35295981
## 4 chr19:31588192:31588316:+@chr19:31588628:31588680:+@chr19:31590356:31590478:+@chr19:31591213:31591310
## 5 chr19:31588253:31588316:+@chr19:31588628:31588680:+@chr19:31588762:31588948:+@chr19:31589982:31590116
## gene_id gene_short_name gene_type
## 1 ENSG00000198223.16 CSF2RA protein_coding
## 2 ENSG00000186431.19 FCAR protein_coding
## 3 ENSG00000105695.15 MAG protein_coding
## 4 ENSG00000267465.7 AC011525.2 lncRNA
## 5 ENSG00000267465.7 AC011525.2 lncRNA
# RI
path <- "Data/rMATS/RI/"
file <- "RI_featureData.txt"
df.feature.ri <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.ri[1:5, ]
## tran_id gene_id
## 1 chrX:156021999:156022145:+@chrX:156022314:156022459 ENSG00000182484.15
## 2 chrX:156021999:156022145:+@chrX:156022323:156022459 ENSG00000182484.15
## 3 chrX:156022314:156022459:+@chrX:156022699:156022834 ENSG00000182484.15
## 4 chrX:156022323:156022539:+@chrX:156022699:156022834 ENSG00000182484.15
## 5 chrX:152714847:152714928:+@chrX:152715079:152715157 ENSG00000183305.13
## gene_short_name gene_type
## 1 WASH6P transcribed_unprocessed_pseudogene
## 2 WASH6P transcribed_unprocessed_pseudogene
## 3 WASH6P transcribed_unprocessed_pseudogene
## 4 WASH6P transcribed_unprocessed_pseudogene
## 5 MAGEA2B protein_coding
# A5SS
path <- "Data/rMATS/A5SS/"
file <- "A5SS_featureData.txt"
df.feature.a5ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.a5ss[1:5, ]
## tran_id
## 1 chrX:156022323:156022459|156022539:+@chrX:156022699:156022834
## 2 chrX:156023012:156023209|156023213:+@chrX:156023302:156023460
## 3 chrX:156025032:156025049|156025100:+@chrX:156025241:156025554
## 4 chrX:152959699:152959896|152959937:+@chrX:152963839:152963949
## 5 chrX:108137804:108137884|108137991:+@chrX:108150152:108150297
## gene_id gene_short_name gene_type
## 1 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 2 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 3 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 4 ENSG00000147394.18 ZNF185 protein_coding
## 5 ENSG00000101844.18 ATG4A protein_coding
# A3SS
path <- "Data/rMATS/A3SS/"
file <- "A3SS_featureData.txt"
df.feature.a3ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.a3ss[1:5, ]
## tran_id
## 1 chrX:156021999:156022145:+@chrX:156022314|156022323:156022459
## 2 chrX:152920328:152920411:+@chrX:152920707|152920710:152920748
## 3 chrX:119011815:119011973:+@chrX:119012880|119013039:119013201
## 4 chrX:105906414:105906589:+@chrX:105907177|105908240:105908303
## 5 chrX:48793172:48793297:+@chrX:48793793|48794134:48794311
## gene_id gene_short_name gene_type
## 1 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 2 ENSG00000147394.18 ZNF185 protein_coding
## 3 ENSG00000175556.16 LONRF3 protein_coding
## 4 ENSG00000123572.17 NRK protein_coding
## 5 ENSG00000102145.14 GATA1 protein_coding
# Merge
df.feature.list <- list(df.feature.se, df.feature.mxe, df.feature.ri, df.feature.a5ss, df.feature.a3ss)
names(df.feature.list) <- c("SE", "MXE", "RI", "A5SS", "A3SS")
# Intron coverage
path <- "Data/MARVEL/PSI/RI/"
file <- "Counts_by_Region.txt"
df.intron.counts <- as.data.frame(fread(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA"))
df.intron.counts[1:5,1:5]
## coord.intron ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 1 chr1:13375:13452 0 0 0 0
## 2 chr1:146510:146641 0 150 0 129
## 3 chr1:498457:498683 678 0 0 0
## 4 chr1:764801:765143 5375 4544 6794 1591
## 5 chr1:804967:806385 794 0 0 0
# Read phenoData (sample metadata)
path <- "Data/RSEM/"
file <- "TPM_phenoData.txt"
df.tpm.pheno <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.tpm.pheno[1:5, ]
## sample.id cell.type sample.type qc.seq
## 1 ERR1562083 Unknown Single Cell pass
## 2 ERR1562084 iPSC Single Cell pass
## 3 ERR1562085 iPSC Single Cell pass
## 4 ERR1562086 iPSC Single Cell pass
## 5 ERR1562087 iPSC Single Cell pass
# Read featureData (gene metadata)
path <- "Data/RSEM/"
file <- "TPM_featureData.txt"
df.tpm.feature <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.tpm.feature[1:5, ]
## gene_id gene_short_name gene_type
## 1 ENSG00000000003.14 TSPAN6 protein_coding
## 2 ENSG00000000005.6 TNMD protein_coding
## 3 ENSG00000000419.12 DPM1 protein_coding
## 4 ENSG00000000457.14 SCYL3 protein_coding
## 5 ENSG00000000460.17 C1orf112 protein_coding
# Read matrix
path <- "Data/RSEM/"
file <- "TPM.txt"
df.tpm <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE)
df.tpm[1:5,1:5]
## gene_id ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 1 ENSG00000000003.14 343.26 163.45 210.43 190.46
## 2 ENSG00000000005.6 5.69 0.00 0.00 0.00
## 3 ENSG00000000419.12 288.02 155.26 42.49 238.67
## 4 ENSG00000000457.14 1.58 8.71 0.00 1.06
## 5 ENSG00000000460.17 41.23 28.53 100.17 66.43
marvel <- CreateMarvelObject(SpliceJunction=sj,
SplicePheno=df.pheno,
SpliceFeature=df.feature.list,
IntronCounts=df.intron.counts,
GenePheno=df.tpm.pheno,
GeneFeature=df.tpm.feature,
Exp=df.tpm
)
marvel <- CheckAlignment(MarvelObject=marvel, level="SJ")
## [1] "192 samples (cells) identified in SJ phenoData"
## [1] "192 samples (cells) identified in SJ count matrix"
## [1] "192 overlapping samples (cells) identified"
## [1] "phenoData SJ IDs and SJ count matrix column names MATCHED"
## [1] "Additional checks for intron count matrix..."
## [1] "192 samples (cells) identified in SJ phenoData"
## [1] "192 samples (cells) identified in intron count matrix"
## [1] "192 overlapping samples (cells) identified"
## [1] "phenoData SJ IDs and SJ count matrix and intron count column names MATCHED"
# Compute PSI
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
UnevenCoverageMultiplier=10,
EventType="SE"
)
## [1] "Analysing 51154 splicing events"
## [1] "20509 splicing events validated and quantified"
# Compute PSI
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
UnevenCoverageMultiplier=10,
EventType="MXE"
)
## [1] "Analysing 4892 splicing events"
## [1] "1279 splicing events validated and quantified"
# Compute PSI
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="A5SS"
)
## [1] "Analysing 10464 splicing events"
## [1] "5163 splicing events validated and quantified"
# Compute PSI
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="A3SS"
)
## [1] "Analysing 13160 splicing events"
## [1] "5852 splicing events validated and quantified"
# Compute PSI
marvel <- ComputePSI(MarvelObject=marvel,
CoverageThreshold=10,
EventType="RI",
thread=4
)
## [1] "Analysing 11829 splicing events"
## [1] "8295 splicing events validated and quantified"
# SE
path <- "Data/MARVEL/PSI/SE/"
file <- "SE_featureData_Validated.txt"
df.feature.se <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.se[1:5, ]
## tran_id
## 1 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152928575:152928661
## 2 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152931672:152931776
## 3 chrX:152922173:152922256:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 4 chrX:152922720:152922809:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 5 chrX:152932873:152932971:+@chrX:152936418:152936513:+@chrX:152938074:152938163
## event_type gene_id gene_short_name gene_type
## 1 SE ENSG00000147394.18 ZNF185 protein_coding
## 2 SE ENSG00000147394.18 ZNF185 protein_coding
## 3 SE ENSG00000147394.18 ZNF185 protein_coding
## 4 SE ENSG00000147394.18 ZNF185 protein_coding
## 5 SE ENSG00000147394.18 ZNF185 protein_coding
# MXE
path <- "Data/MARVEL/PSI/MXE/"
file <- "MXE_featureData_Validated.txt"
df.feature.mxe <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.mxe[1:5, ]
## tran_id
## 1 chr10:97737121:97737321:+@chr10:97738477:97738674:+@chr10:97744729:97744915:+@chr10:97748269:97748364
## 2 chr17:7559068:7559297:+@chr17:7559624:7559702:+@chr17:7559851:7559893:+@chr17:7560049:7560167
## 3 chr1:209429181:209429408:+@chr1:209430425:209430500:+@chr1:209431743:209431825:+@chr1:209432204:209432549
## 4 chrX:97310546:97310658:+@chrX:97315690:97315822:+@chrX:97326578:97326722:+@chrX:97348118:97348280
## 5 chrX:97310546:97310658:+@chrX:97315690:97315893:+@chrX:97346806:97346949:+@chrX:97348081:97348280
## event_type gene_id gene_short_name gene_type
## 1 MXE ENSG00000155256.17 ZFYVE27 protein_coding
## 2 MXE ENSG00000161955.16 TNFSF13 protein_coding
## 3 MXE ENSG00000230937.12 MIR205HG lncRNA
## 4 MXE ENSG00000147202.18 DIAPH2 protein_coding
## 5 MXE ENSG00000147202.18 DIAPH2 protein_coding
# RI
path <- "Data/MARVEL/PSI/RI/"
file <- "RI_featureData_Validated.txt"
df.feature.ri <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.ri[1:5, ]
## tran_id event_type
## 1 chrX:152714847:152714928:+@chrX:152715079:152715157 RI
## 2 chrX:149713143:149713255:+@chrX:149714481:149714576 RI
## 3 chrX:48597492:48597614:+@chrX:48597958:48598037 RI
## 4 chr22:37311456:37311527:+@chr22:37312020:37312174 RI
## 5 chr22:29307058:29307186:+@chr22:29308087:29308738 RI
## gene_id gene_short_name gene_type
## 1 ENSG00000183305.13 MAGEA2B protein_coding
## 2 ENSG00000185247.15 MAGEA11 protein_coding
## 3 ENSG00000101940.18 WDR13 protein_coding
## 4 ENSG00000100055.21 CYTH4 protein_coding
## 5 ENSG00000185340.15 GAS2L1 protein_coding
# A5SS
path <- "Data/MARVEL/PSI/A5SS/"
file <- "A5SS_featureData_Validated.txt"
df.feature.a5ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.a5ss[1:5, ]
## tran_id event_type
## 1 chrX:156022323:156022459|156022539:+@chrX:156022699:156022834 A5SS
## 2 chrX:156023012:156023209|156023213:+@chrX:156023302:156023460 A5SS
## 3 chrX:49061959:49062058|49062117:+@chrX:49062235:49062325 A5SS
## 4 chr19:44758246:44758413|44758473:+@chr19:44758724:44758841 A5SS
## 5 chr19:42314792:42314851|42314893:+@chr19:42314993:42315077 A5SS
## gene_id gene_short_name gene_type
## 1 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 2 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 3 ENSG00000147144.13 CCDC120 protein_coding
## 4 ENSG00000069399.15 BCL3 protein_coding
## 5 ENSG00000167619.12 TMEM145 protein_coding
# A3SS
path <- "Data/MARVEL/PSI/A3SS/"
file <- "A3SS_featureData_Validated.txt"
df.feature.a3ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.feature.a5ss[1:5, ]
## tran_id event_type
## 1 chrX:156022323:156022459|156022539:+@chrX:156022699:156022834 A5SS
## 2 chrX:156023012:156023209|156023213:+@chrX:156023302:156023460 A5SS
## 3 chrX:49061959:49062058|49062117:+@chrX:49062235:49062325 A5SS
## 4 chr19:44758246:44758413|44758473:+@chr19:44758724:44758841 A5SS
## 5 chr19:42314792:42314851|42314893:+@chr19:42314993:42315077 A5SS
## gene_id gene_short_name gene_type
## 1 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 2 ENSG00000182484.15 WASH6P transcribed_unprocessed_pseudogene
## 3 ENSG00000147144.13 CCDC120 protein_coding
## 4 ENSG00000069399.15 BCL3 protein_coding
## 5 ENSG00000167619.12 TMEM145 protein_coding
# Merge
df.feature.list <- list(df.feature.se, df.feature.mxe, df.feature.ri, df.feature.a5ss, df.feature.a3ss)
names(df.feature.list) <- c("SE", "MXE", "RI", "A5SS", "A3SS")
# SE
path <- "Data/MARVEL/PSI/SE/"
file <- "SE.txt"
df.psi.se <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.psi.se[1:5, 1:2]
## tran_id
## 1 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152928575:152928661
## 2 chrX:152922173:152922256:+@chrX:152922720:152922809:+@chrX:152931672:152931776
## 3 chrX:152922173:152922256:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 4 chrX:152922720:152922809:+@chrX:152928575:152928661:+@chrX:152931672:152931776
## 5 chrX:152932873:152932971:+@chrX:152936418:152936513:+@chrX:152938074:152938163
## ERR1562083
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
# MXE
path <- "Data/MARVEL/PSI/MXE/"
file <- "MXE.txt"
df.psi.mxe <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.psi.mxe[1:5, 1:5]
## tran_id
## 1 chr10:97737121:97737321:+@chr10:97738477:97738674:+@chr10:97744729:97744915:+@chr10:97748269:97748364
## 2 chr17:7559068:7559297:+@chr17:7559624:7559702:+@chr17:7559851:7559893:+@chr17:7560049:7560167
## 3 chr1:209429181:209429408:+@chr1:209430425:209430500:+@chr1:209431743:209431825:+@chr1:209432204:209432549
## 4 chrX:97310546:97310658:+@chrX:97315690:97315822:+@chrX:97326578:97326722:+@chrX:97348118:97348280
## 5 chrX:97310546:97310658:+@chrX:97315690:97315893:+@chrX:97346806:97346949:+@chrX:97348081:97348280
## ERR1562083 ERR1562084 ERR1562085 ERR1562086
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 0.392 NA NA 0
# RI
path <- "Data/MARVEL/PSI/RI/"
file <- "RI.txt"
df.psi.ri <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.psi.ri[1:5, 1:5]
## tran_id ERR1562083 ERR1562084
## 1 chrX:152714847:152714928:+@chrX:152715079:152715157 NA NA
## 2 chrX:149713143:149713255:+@chrX:149714481:149714576 NA NA
## 3 chrX:48597492:48597614:+@chrX:48597958:48598037 NA NA
## 4 chr22:37311456:37311527:+@chr22:37312020:37312174 NA NA
## 5 chr22:29307058:29307186:+@chr22:29308087:29308738 NA NA
## ERR1562085 ERR1562086
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
# A5SS
path <- "Data/MARVEL/PSI/A5SS/"
file <- "A5SS.txt"
df.psi.a5ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.psi.a5ss[1:5, 1:5]
## tran_id ERR1562083
## 1 chrX:156022323:156022459|156022539:+@chrX:156022699:156022834 NA
## 2 chrX:156023012:156023209|156023213:+@chrX:156023302:156023460 NA
## 3 chrX:49061959:49062058|49062117:+@chrX:49062235:49062325 NA
## 4 chr19:44758246:44758413|44758473:+@chr19:44758724:44758841 NA
## 5 chr19:42314792:42314851|42314893:+@chr19:42314993:42315077 NA
## ERR1562084 ERR1562085 ERR1562086
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
# A3SS
path <- "Data/MARVEL/PSI/A3SS/"
file <- "A3SS.txt"
df.psi.a3ss <- read.table(paste(path, file, sep=""), sep="\t", header=TRUE, stringsAsFactors=FALSE, na.strings="NA")
df.psi.a3ss[1:5, 1:5]
## tran_id ERR1562083
## 1 chrX:152920328:152920411:+@chrX:152920707|152920710:152920748 NA
## 2 chrX:48597958:48598037:+@chrX:48598717|48598751:48598957 NA
## 3 chrX:48599353:48599462:+@chrX:48599587|48599599:48599717 NA
## 4 chrX:13619163:13619243:+@chrX:13623821|13623824:13623925 NA
## 5 chr19:56158494:56158602:+@chr19:56159624|56159627:56160893 NA
## ERR1562084 ERR1562085 ERR1562086
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
# Merge
df.psi.list <- list(df.psi.se, df.psi.mxe, df.psi.ri, df.psi.a5ss, df.psi.a3ss)
names(df.psi.list) <- c("SE", "MXE", "RI", "A5SS", "A3SS")
marvel <- CreateMarvelObject(SplicePheno=df.pheno,
SpliceFeatureValidated=df.feature.list,
PSI=df.psi.list,
GenePheno=df.tpm.pheno,
GeneFeature=df.tpm.feature,
Exp=df.tpm
)
columns and variables to subset the desired cells for downstream analysis, e.g. cells that passed sequencing QC.# Splicing data
marvel <- SubsetSamples(MarvelObject=marvel,
columns=c("qc.seq", "sample.type", "cell.type"),
variables=list("pass", "Single Cell", c("iPSC", "Endoderm")),
level="splicing"
)
## [1] "192 samples identified in phenoData"
## [1] "136 samples retained in phenoData"
# Gene data
marvel <- SubsetSamples(MarvelObject=marvel,
columns=c("qc.seq", "sample.type", "cell.type"),
variables=list("pass", "Single Cell", c("iPSC", "Endoderm")),
level="gene"
)
## [1] "192 samples identified in phenoData"
## [1] "136 samples retained in phenoData"
# Splicing data
marvel <- CheckAlignment(MarvelObject=marvel, level="splicing")
## [1] "Checking phenoData..."
## [1] "136 samples identified in phenoData"
## [1] "136 samples identified in phenoData "
## [1] "193 samples identified in matrix(s) "
## [1] "136 overlapping samples retained "
## [1] "Checking alignment..."
## [1] "phenoData sample IDs and matrix column names MATCHED for SE"
## [1] "phenoData sample IDs and matrix column names MATCHED for MXE"
## [1] "phenoData sample IDs and matrix column names MATCHED for RI"
## [1] "phenoData sample IDs and matrix column names MATCHED for A5SS"
## [1] "phenoData sample IDs and matrix column names MATCHED for A3SS"
## [1] "Checking for SE"
## [1] "20509 transcripts identified in featureData "
## [1] "20509 transcripts identified in matrix"
## [1] "20509 overlapping transcripts retained"
## [1] "Checking for MXE"
## [1] "1279 transcripts identified in featureData "
## [1] "1279 transcripts identified in matrix"
## [1] "1279 overlapping transcripts retained"
## [1] "Checking for RI"
## [1] "8295 transcripts identified in featureData "
## [1] "8295 transcripts identified in matrix"
## [1] "8295 overlapping transcripts retained"
## [1] "Checking for A5SS"
## [1] "5163 transcripts identified in featureData "
## [1] "5163 transcripts identified in matrix"
## [1] "5163 overlapping transcripts retained"
## [1] "Checking for A3SS"
## [1] "5852 transcripts identified in featureData "
## [1] "5852 transcripts identified in matrix"
## [1] "5852 overlapping transcripts retained"
## [1] "Checking alignment..."
## [1] "featureData transcript IDs and matrix row names MATCHED for SE"
## [1] "featureData transcript IDs and matrix row names MATCHED for MXE"
## [1] "featureData transcript IDs and matrix row names MATCHED for RI"
## [1] "featureData transcript IDs and matrix row names MATCHED for A5SS"
## [1] "featureData transcript IDs and matrix row names MATCHED for A3SS"
# Gene data
marvel <- CheckAlignment(MarvelObject=marvel, level="gene")
## [1] "Checking phenoData..."
## [1] "136 samples identified in phenoData"
## [1] "136 samples identified in phenoData "
## [1] "192 samples identified in matrix(s) "
## [1] "136 overlapping samples retained "
## [1] "Checking alignment..."
## [1] "phenoData sample IDs and matrix column names MATCHED"
## [1] "Checking featureData..."
## [1] "60603 genes identified in featureData"
## [1] "60603 genes identified in featureData "
## [1] "60603 genes identified in matrix(s) "
## [1] "60603 overlapping genes retained "
## [1] "Checking alignment..."
## [1] "phenoData sample IDs and matrix column names MATCHED"
marvel <- TransformExpValues(MarvelObject=marvel)
## [1] "Gene expression values offset by 1 and then log2-transformed. Transformed values below 1 re-coded as 0"
# Perform DE analysis
marvel <- CompareValues(MarvelObject=marvel,
cell.type.columns.1=c("cell.type"),
cell.type.variables.1=list("iPSC"),
cell.type.columns.2=c("cell.type"),
cell.type.variables.2=list("Endoderm"),
n.cells=3,
method="wilcox",
method.adjust="fdr",
level="gene"
)
## [1] "10330 DE genes < 0.10 adjusted p-value"
## [1] "8955 DE genes < 0.05 adjusted p-value"
## [1] "6778 DE genes < 0.01 adjusted p-value"
# Check result table
marvel$DE$Exp$Table[1:5, ]
## gene_id gene_short_name gene_type n.cells.g1 n.cells.g2
## 1 ENSG00000141448.10 GATA6 protein_coding 0 52
## 2 ENSG00000273706.4 LHX1 protein_coding 1 52
## 3 ENSG00000250361.8 GYPB protein_coding 3 52
## 4 ENSG00000266010.2 GATA6-AS1 lncRNA 1 51
## 5 ENSG00000158815.11 FGF17 protein_coding 0 50
## mean.g1 mean.g2 log2fc p.val p.val.adj
## 1 0.00000000 6.051803 6.051803 3.364948e-28 6.476516e-24
## 2 0.02387774 6.346353 6.322476 7.065377e-28 6.799366e-24
## 3 0.06427675 12.180694 12.116417 2.412354e-27 1.547686e-23
## 4 0.01578723 7.528112 7.512325 3.652428e-27 1.757457e-23
## 5 0.00000000 5.835133 5.835133 9.209198e-27 2.743229e-23
# Retrieve DE gene IDs
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj < 0.10), "gene_id"]
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=3,
features=gene_ids,
point.size=0.5,
level="gene"
)
# View PCA
marvel$PCA$Exp$Plot
# Retrieve non-DE
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj >= 0.10), "gene_id"]
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=3,
features=gene_ids,
point.size=0.5,
level="gene"
)
# View PCA
marvel$PCA$Exp$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj >= 0.10), "gene_id"]
# Retrieve tran_ids
df.feature <- marvel$SpliceFeatureValidate$SE
df.feature <- df.feature [which(df.feature $gene_id %in% gene_ids), ]
tran_ids <- df.feature$tran_id
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=25,
features=tran_ids,
level="splicing",
event.type="SE",
seed=1
)
# View PCA
marvel$PCA$PSI$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj >= 0.10), "gene_id"]
# Retrieve tran_ids
df.feature <- marvel$SpliceFeatureValidate$MXE
df.feature <- df.feature [which(df.feature$gene_id %in% gene_ids), ]
tran_ids <- df.feature$tran_id
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=25,
features=tran_ids,
level="splicing",
event.type="MXE",
seed=1
)
# View PCA
marvel$PCA$PSI$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj >= 0.10), "gene_id"]
# Retrieve tran_ids
df.feature <- marvel$SpliceFeatureValidate$A5SS
df.feature <- df.feature[which(df.feature$gene_id %in% gene_ids), ]
tran_ids <- df.feature$tran_id
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=25,
features=tran_ids,
level="splicing",
event.type="A5SS",
seed=1
)
# View PCA
marvel$PCA$PSI$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj >= 0.10), "gene_id"]
# Retrieve tran_ids
df.feature <- marvel$SpliceFeatureValidate$A3SS
df.feature <- df.feature[which(df.feature$gene_id %in% gene_ids), ]
tran_ids <- df.feature$tran_id
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=25,
features=tran_ids,
level="splicing",
event.type="A3SS",
seed=1
)
# View PCA
marvel$PCA$PSI$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj >= 0.10), "gene_id"]
# Retrieve tran_ids
df.feature <- marvel$SpliceFeatureValidate$RI
df.feature <- df.feature[which(df.feature$gene_id %in% gene_ids), ]
tran_ids <- df.feature$tran_id
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=25,
features=tran_ids,
level="splicing",
event.type="RI",
seed=1
)
# View PCA
marvel$PCA$PSI$Plot
# Retrieve non-DE gene_ids
results.de.exp <- marvel$DE$Exp$Table
gene_ids <- results.de.exp[which(results.de.exp$p.val.adj >= 0.10), "gene_id"]
# Retrieve tran_ids
df.feature <- do.call(rbind.data.frame, marvel$SpliceFeatureValidated)
df.feature <- df.feature[which(df.feature$gene_id %in% gene_ids), ]
tran_ids <- df.feature$tran_id
# Reduce dimension
marvel <- RunPCA(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
n.cells=25,
features=tran_ids,
point.size=0.5,
level="splicing",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS"),
seed=1
)
# Check for outliers
marvel$PCA$PSI$Plot
# Assign modality
marvel <- AssignModality(MarvelObject=marvel,
cell.type.columns=c("cell.type"),
cell.type.variables=list("iPSC"),
n.cells=25,
bimodal.adjust=TRUE,
seed=1
)
marvel$Modality$Results[1:5, ]
## tran_id
## 1 chrX:155090784:155090839:+@chrX:155099315:155099389:+@chrX:155116057:155116188
## 2 chrX:155090784:155090839:+@chrX:155116057:155116188:+@chrX:155116711:155116754
## 3 chrX:155033403:155033553:+@chrX:155046509:155046584:+@chrX:155051670:155051801
## 4 chrX:155046509:155046584:+@chrX:155047369:155047391:+@chrX:155051670:155051801
## 5 chrX:154399338:154399396:+@chrX:154399487:154399594:+@chrX:154399803:154399941
## event_type gene_id gene_short_name gene_type n.cells
## 1 SE ENSG00000185515.14 BRCC3 protein_coding 42
## 2 SE ENSG00000185515.14 BRCC3 protein_coding 35
## 3 SE ENSG00000165775.18 FUNDC2 protein_coding 63
## 4 SE ENSG00000165775.18 FUNDC2 protein_coding 67
## 5 SE ENSG00000147403.16 RPL10 protein_coding 83
## alpha beta modality modality.var modality.bimodal.adj
## 1 0.3185248 2.1989103 Bimodal Bimodal Excluded.Dispersed
## 2 186.8246753 1.8538713 Included Included.Primary Included.Primary
## 3 45.0932034 0.7900596 Included Included.Dispersed Included.Dispersed
## 4 2.4134211 233.6340402 Excluded Excluded.Primary Excluded.Primary
## 5 237.6961900 2.2802562 Included Included.Primary Included.Primary
# Proportion of each modality
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS"),
across.event.type=FALSE
)
marvel$Modality$Prop$DoughnutChart$Table
## modality freq pct
## 5 Included.Primary 1978 18.6392763
## 4 Included.Dispersed 2997 28.2416133
## 3 Excluded.Primary 2354 22.1824350
## 2 Excluded.Dispersed 2931 27.6196758
## 1 Bimodal 48 0.4523181
## 6 Middle 87 0.8198266
## 7 Multimodal 217 2.0448549
marvel$Modality$Prop$DoughnutChart$Plot
# Proportion of each modality, by event type
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS"),
across.event.type=TRUE,
prop.test="chisq",
prop.adj="fdr",
xlabels.size=8
)
marvel$Modality$Prop$BarChart$Plot
marvel$Modality$Prop$BarChart$Stats
## modality p.val p.val.adj
## 1 Included\n(Primary) 7.400294e-98 5.180205e-97
## 2 Included\n(Dispersed) 1.696943e-37 2.969651e-37
## 3 Excluded\n(Primary) 3.800153e-54 1.330053e-53
## 4 Excluded\n(Dispersed) 1.448042e-42 3.378764e-42
## 5 Bimodal 8.463288e-05 8.463288e-05
## 6 Middle 7.405006e-20 1.036701e-19
## 7 Multimodal 1.540602e-12 1.797369e-12
# Assign modality
marvel <- AssignModality(MarvelObject=marvel,
cell.type.columns=c("cell.type"),
cell.type.variables=list("Endoderm"),
n.cells=25,
bimodal.adjust=TRUE,
seed=1
)
marvel$Modality$Results[1:5, ]
## tran_id
## 1 chrX:155033403:155033553:+@chrX:155046509:155046584:+@chrX:155051670:155051801
## 2 chrX:155046509:155046584:+@chrX:155047369:155047391:+@chrX:155051670:155051801
## 3 chrX:154399338:154399396:+@chrX:154399487:154399594:+@chrX:154399803:154399941
## 4 chrX:154399803:154399941:+@chrX:154400464:154400626:+@chrX:154400702:154402339
## 5 chrX:154379197:154379566:+@chrX:154379690:154379794:+@chrX:154379942:154380019
## event_type gene_id gene_short_name gene_type n.cells
## 1 SE ENSG00000165775.18 FUNDC2 protein_coding 28
## 2 SE ENSG00000165775.18 FUNDC2 protein_coding 31
## 3 SE ENSG00000147403.16 RPL10 protein_coding 53
## 4 SE ENSG00000147403.16 RPL10 protein_coding 53
## 5 SE ENSG00000102119.10 EMD protein_coding 30
## alpha beta modality modality.var modality.bimodal.adj
## 1 44.2077643 0.7706464 Included Included.Dispersed Included.Dispersed
## 2 0.2627077 2.1244924 Bimodal Bimodal Excluded.Dispersed
## 3 233.6663960 2.2003259 Included Included.Primary Included.Primary
## 4 39.3616462 0.9308145 Included Included.Primary Included.Primary
## 5 170.2131013 1.6925857 Included Included.Primary Included.Primary
# Proportion of each modality
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS"),
across.event.type=FALSE
)
marvel$Modality$Prop$DoughnutChart$Table
## modality freq pct
## 5 Included.Primary 958 22.7499406
## 4 Included.Dispersed 883 20.9688910
## 3 Excluded.Primary 1229 29.1854666
## 2 Excluded.Dispersed 1012 24.0322964
## 1 Bimodal 45 1.0686298
## 6 Middle 23 0.5461886
## 7 Multimodal 61 1.4485870
marvel$Modality$Prop$DoughnutChart$Plot
# Proportion of each modality, by event type
marvel <- PropModality(MarvelObject=marvel,
modality.column="modality.bimodal.adj",
modality.type="extended",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS"),
across.event.type=TRUE,
prop.test="chisq",
prop.adj="fdr",
xlabels.size=8
)
marvel$Modality$Prop$BarChart$Plot
marvel$Modality$Prop$BarChart$Stats
## modality p.val p.val.adj
## 1 Included\n(Primary) 9.270380e-63 6.489266e-62
## 2 Included\n(Dispersed) 4.973427e-08 5.802331e-08
## 3 Excluded\n(Primary) 7.053331e-16 1.645777e-15
## 4 Excluded\n(Dispersed) 3.168902e-23 1.109116e-22
## 5 Bimodal 2.219869e-04 2.219869e-04
## 6 Middle 5.716360e-13 1.000363e-12
## 7 Multimodal 8.200852e-09 1.148119e-08
# DE: Anderson-Darling
marvel <- CompareValues(MarvelObject=marvel,
cell.type.columns.1=c("cell.type"),
cell.type.variables.1=list("iPSC"),
cell.type.columns.2=c("cell.type"),
cell.type.variables.2=list("Endoderm"),
n.cells=25,
method="ad",
method.adjust="fdr",
level="splicing",
event.type=c("SE", "MXE", "RI", "A5SS", "A3SS")
)
## [1] "777 DE splicing events < 0.10 adjusted p-value"
## [1] "646 DE splicing events < 0.05 adjusted p-value"
## [1] "440 DE splicing events < 0.01 adjusted p-value"
marvel$DE$PSI$Table[1:5, ]
## tran_id
## 1 chr13:43059394:43059714:+@chr13:43062190:43062295
## 2 chr15:24962114:24962209:+@chr15:24967029:24967152:+@chr15:24967932:24968082
## 3 chr17:8383254:8382781|8383157:-@chr17:8382143:8382315
## 4 chr10:78037194:78037304:+@chr10:78037439:78037441:+@chr10:78040204:78040225
## 5 chr10:78037194:78037304:+@chr10:78037439|78040204:78040225
## event_type gene_id gene_short_name gene_type n.cells.g1
## 1 RI ENSG00000120675.6 DNAJC15 protein_coding 67
## 2 SE ENSG00000128739.22 SNRPN protein_coding 83
## 3 A5SS ENSG00000161970.15 RPL26 protein_coding 83
## 4 SE ENSG00000138326.20 RPS24 protein_coding 82
## 5 A3SS ENSG00000138326.20 RPS24 protein_coding 83
## n.cells.g2 mean.g1 mean.g2 mean.diff statistic p.val
## 1 53 0.0001407511 0.122010454 0.12186970 66.959 1.4350e-37
## 2 45 0.1003557547 0.003400754 -0.09695500 63.851 8.5431e-36
## 3 53 0.0532707355 0.005447294 -0.04782344 59.893 1.4504e-33
## 4 52 0.1301291792 0.012559445 -0.11756973 57.853 1.9554e-32
## 5 53 0.1346476002 0.014915462 -0.11973214 56.514 1.1045e-31
## p.val.adj
## 1 5.744305e-34
## 2 1.709901e-32
## 3 1.935317e-30
## 4 1.956867e-29
## 5 8.842627e-29
# Plot DE results
marvel <- PlotDEValues(MarvelObject=marvel,
de.p.val.adj=0.10,
level="splicing.distance",
anno=FALSE
)
marvel$DE$PSI$Plot
# Plot DE results: Annotate top events
marvel <- PlotDEValues(MarvelObject=marvel,
de.p.val.adj=0.10,
level="splicing.distance",
anno=TRUE,
n.top=5,
label.size = 2.5
)
marvel$DE$PSI$Plot
# DE: Wilcox
marvel <- CompareValues(MarvelObject=marvel,
cell.type.columns.1=c("cell.type"),
cell.type.variables.1=list("iPSC"),
cell.type.columns.2=c("cell.type"),
cell.type.variables.2=list("Endoderm"),
n.cells=3,
method="wilcox",
method.adjust="fdr",
level="gene"
)
## [1] "10330 DE genes < 0.10 adjusted p-value"
## [1] "8955 DE genes < 0.05 adjusted p-value"
## [1] "6778 DE genes < 0.01 adjusted p-value"
marvel$DE$Exp$Table[1:5, ]
## gene_id gene_short_name gene_type n.cells.g1 n.cells.g2
## 1 ENSG00000141448.10 GATA6 protein_coding 0 52
## 2 ENSG00000273706.4 LHX1 protein_coding 1 52
## 3 ENSG00000250361.8 GYPB protein_coding 3 52
## 4 ENSG00000266010.2 GATA6-AS1 lncRNA 1 51
## 5 ENSG00000158815.11 FGF17 protein_coding 0 50
## mean.g1 mean.g2 log2fc p.val p.val.adj
## 1 0.00000000 6.051803 6.051803 3.364948e-28 6.476516e-24
## 2 0.02387774 6.346353 6.322476 7.065377e-28 6.799366e-24
## 3 0.06427675 12.180694 12.116417 2.412354e-27 1.547686e-23
## 4 0.01578723 7.528112 7.512325 3.652428e-27 1.757457e-23
## 5 0.00000000 5.835133 5.835133 9.209198e-27 2.743229e-23
# Plot DE results
marvel <- PlotDEValues(MarvelObject=marvel,
de.p.val.adj=0.10,
log2fc=1.0,
level="gene",
anno=FALSE
)
marvel$DE$Exp$Plot
# Plot DE results: Annotate top genes
marvel <- PlotDEValues(MarvelObject=marvel,
de.p.val.adj=0.10,
log2fc=1.0,
level="gene",
anno=TRUE,
anno.x.pos=8,
anno.y.pos=-log10(0.10),
anno.x.neg=-7,
anno.y.neg=-log10(0.10)
)
marvel$DE$Exp$Plot
marvel <- ModalityChange(MarvelObject=marvel,
psi.de.sig=0.10,
bimodal.adjust=TRUE,
seed=1,
modality.column="modality.bimodal.adj"
)
marvel$DE$Modality$Plot
marvel$DE$Modality$Plot.Stats
## modality.change freq pct
## 1 Explicit 98 12.61261
## 2 Implicit 147 18.91892
## 3 Restricted 532 68.46847
# Define tran_id to plot
tran_id <- "chr3:129171771:129171634|129171655:-@chr3:129171446:129171538"
# Plot
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
# View plot
marvel$adhocPlot$PSI
# Define tran_id to plot
tran_id <- "chr2:37196505:37196520:+@chr2:37198494:37198672:+@chr2:37199704:37199819"
# Plot
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
# View plot
marvel$adhocPlot$PSI
# Define tran_id to plot
tran_id <- "chr15:60397943:60398043:-@chr15:60397258:60397338:-@chr15:60386028:60386086"
# Plot
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
# View plot
marvel$adhocPlot$PSI
# Categorize gene-splicing mean relationship
marvel <- IsoSwitch(MarvelObject=marvel,
psi.de.sig=0.10,
method="wilcox",
method.adjust="fdr",
gene.de.sig=0.10
)
marvel$DE$Cor$Plot
marvel$DE$Cor$Plot.Stats
## psi.gene.cor freq pct
## 4 Positive 208 26.76963
## 3 Negative 153 19.69112
## 1 Iso-Switch 172 22.13642
## 2 Mixed 244 31.40283
# Define gene_id and tran_id to plot
# gene_id
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="MRPS18C"), "gene_id"]
# tran_id
tran_id <- "chr4:83456909:83456958:+@chr4:83458346:83458429:+@chr4:83459740:83459797"
# Plot gene expression values
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns=list(c("cell.type"), c("cell.type")),
cell.type.variables=list(list("iPSC"), list("Endoderm")),
cell.type.labels=c("iPSC", "Endoderm"),
feature=gene_id,
xlabels.size=9,
level="gene"
)
plot.1 <- marvel$adhocPlot$Exp
# Plot PSI values
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
plot.2 <- marvel$adhocPlot$PSI
# Arrange and view plots
grid.arrange(plot.1, plot.2, nrow=1)
# Define gene_id and tran_id to plot
# gene_id
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="RPL13A"), "gene_id"]
# tran_id
tran_id <- "chr19:49489850:49489912:+@chr19:49490232:49490297"
# Plot gene expression values
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns=list(c("cell.type"), c("cell.type")),
cell.type.variables=list(list("iPSC"), list("Endoderm")),
cell.type.labels=c("iPSC", "Endoderm"),
feature=gene_id,
xlabels.size=9,
level="gene"
)
plot.1 <- marvel$adhocPlot$Exp
# Plot PSI values
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
plot.2 <- marvel$adhocPlot$PSI
# Arrange and view plots
grid.arrange(plot.1, plot.2, nrow=1)
# Define gene_id and tran_id to plot
# gene_id
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="PRRC2C"), "gene_id"]
# tran_id: event 1
tran_id.1 <- "chr1:171512032:171512200:+@chr1:171512995|171513001:171513172"
# tran_id: event 2
tran_id.2 <- "chr1:171584419:171584526|171584541:+@chr1:171587003:171587221"
# Plot gene expression values
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns=list(c("cell.type"), c("cell.type")),
cell.type.variables=list(list("iPSC"), list("Endoderm")),
cell.type.labels=c("iPSC", "Endoderm"),
feature=gene_id,
xlabels.size=9,
level="gene"
)
plot.1 <- marvel$adhocPlot$Exp
# Plot PSI values: event 1
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id.1,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
plot.2 <- marvel$adhocPlot$PSI
# Plot PSI values: event 2
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id.2,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
plot.3 <- marvel$adhocPlot$PSI
# Arrange and view plots
grid.arrange(plot.1, plot.2, plot.2, nrow=1)
# Define gene_id and tran_id to plot
# gene_id
df.feature <- marvel$GeneFeature
gene_id <- df.feature[which(df.feature$gene_short_name=="ENAH"), "gene_id"]
# tran_id
tran_id <- "chr1:225507951:225508017:-@chr1:225504991:225505053:-@chr1:225500992:225501070"
# Plot gene expression values
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns=list(c("cell.type"), c("cell.type")),
cell.type.variables=list(list("iPSC"), list("Endoderm")),
cell.type.labels=c("iPSC", "Endoderm"),
feature=gene_id,
xlabels.size=9,
level="gene"
)
plot.1 <- marvel$adhocPlot$Exp
# Plot PSI values
marvel <- PlotValues(MarvelObject=marvel,
cell.type.columns <- list(c("cell.type"), c("cell.type")),
cell.type.variables <- list(list("iPSC"), list("Endoderm")),
cell.type.labels <- c("iPSC", "Endoderm"),
feature=tran_id,
xlabels.size=8,
level="splicing",
n.cells=25,
bimodal.adjust=TRUE,
seed=1,
n.cells.jitter.threshold=50,
n.cells.jitter.seed=1
)
plot.2 <- marvel$adhocPlot$PSI
# Arrange and view plots
grid.arrange(plot.1, plot.2, nrow=1)
# Geno ontology analysis
marvel <- BioPathways(MarvelObject=marvel,
de.p.val.adj=0.10,
min.gene.set.size=10,
method.adjust="fdr",
remove.ribo=FALSE,
annotate=TRUE
)
## [1] "1465 unique genes identified"
## [1] "496 unique differentially spliced genes identified"
marvel$DE$BioPathways$Table[1:5, ]
## GOBPID Pvalue OddsRatio ExpCount Count Size
## 1 GO:0006412 3.068666e-11 2.809312 68.11809 110 199
## 2 GO:0043043 4.767155e-11 2.774585 68.46039 110 200
## 3 GO:0006413 1.006340e-10 3.759895 36.62631 68 107
## 4 GO:0006518 4.590505e-10 2.556010 73.59492 114 215
## 5 GO:0010468 7.516252e-10 2.068765 162.93572 214 476
## Term p.val.adj
## 1 translation 4.295207e-08
## 2 peptide biosynthetic process 4.295207e-08
## 3 translational initiation 6.044747e-08
## 4 peptide metabolic process 2.068023e-07
## 5 regulation of gene expression 2.708857e-07
## genes.hit
## 1 C1QBP|CALR|CDK4|EEF1A1|EEF1B2|EEF1D|EIF2S3|EIF4A1|EIF4A2|EIF4B|EIF4G2|EIF5A|HNRNPD|ILF3|EIF6|NCL|RPL10A|NPM1|PA2G4|PKM|POLR2G|PPP1CA|RBM4|RPL3|RPL4|RPL7A|RPL8|RPL9|RPL10|RPL12|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL29|RPL31|RPL35A|RPL37|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS9|RPS10|RPS11|RPS12|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|SHMT2|SOX4|SRP9|TIA1|TUFM|UBA52|VIM|EIF4H|CNBP|DDX39B|DENR|EIF3D|EIF3F|EIF3G|CDC123|MRPL33|MATR3|RBM8A|EIF1|HNRNPR|RACK1|EIF3M|CELF1|RPL35|RPL13A|RPL36|PABPC1|PPA2|EIF3K|MRPL42|MRPL13|METTL5|MRPL22|CNOT7|MRPS16|MRPS18C|TMA7|TRMT112|ZC3H15|MRPL32|UQCC2|MRPL52|RPL22L1|DPH3
## 2 C1QBP|CALR|CDK4|EEF1A1|EEF1B2|EEF1D|EIF2S3|EIF4A1|EIF4A2|EIF4B|EIF4G2|EIF5A|HNRNPD|ILF3|EIF6|NCL|RPL10A|NPM1|PA2G4|PKM|POLR2G|PPP1CA|RBM4|RPL3|RPL4|RPL7A|RPL8|RPL9|RPL10|RPL12|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL29|RPL31|RPL35A|RPL37|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS9|RPS10|RPS11|RPS12|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|SHMT2|SOX4|SRP9|TIA1|TUFM|UBA52|VIM|EIF4H|CNBP|DDX39B|DENR|EIF3D|EIF3F|EIF3G|CDC123|MRPL33|MATR3|RBM8A|EIF1|HNRNPR|RACK1|EIF3M|CELF1|RPL35|RPL13A|RPL36|PABPC1|PPA2|EIF3K|MRPL42|MRPL13|METTL5|MRPL22|CNOT7|MRPS16|MRPS18C|TMA7|TRMT112|ZC3H15|MRPL32|UQCC2|MRPL52|RPL22L1|DPH3
## 3 EIF2S3|EIF4A1|EIF4A2|EIF4B|EIF4G2|EIF6|RPL10A|NPM1|POLR2G|PPP1CA|RBM4|RPL3|RPL4|RPL7A|RPL8|RPL9|RPL10|RPL12|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL29|RPL31|RPL35A|RPL37|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS9|RPS10|RPS11|RPS12|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|UBA52|EIF4H|DENR|EIF3D|EIF3F|EIF3G|CDC123|EIF1|EIF3M|RPL35|RPL13A|RPL36|PABPC1|EIF3K
## 4 C1QBP|CALR|CDK4|EEF1A1|EEF1B2|EEF1D|EIF2S3|EIF4A1|EIF4A2|EIF4B|EIF4G2|EIF5A|GSTP1|HNRNPD|IDH1|ILF3|EIF6|NCL|RPL10A|NPM1|PA2G4|PKM|POLR2G|PPP1CA|RBM4|RPL3|RPL4|RPL7A|RPL8|RPL9|RPL10|RPL12|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL29|RPL31|RPL35A|RPL37|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS9|RPS10|RPS11|RPS12|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|SHMT2|SOX4|SRP9|TIA1|TUFM|UBA52|VIM|EIF4H|CNBP|DDX39B|PICALM|DENR|EIF3D|EIF3F|EIF3G|CDC123|MRPL33|MATR3|RBM8A|EIF1|HNRNPR|RACK1|EIF3M|CELF1|RPL35|SEC11A|RPL13A|RPL36|PABPC1|PPA2|EIF3K|MRPL42|MRPL13|METTL5|MRPL22|CNOT7|MRPS16|MRPS18C|TMA7|TRMT112|ZC3H15|MRPL32|UQCC2|MRPL52|RPL22L1|DPH3
## 5 ACTB|ACTG1|APEX1|ATRX|C1QBP|CALR|CDK4|CHD2|CTNNB1|DDX5|EIF2S3|EIF4A2|EIF4B|EIF4G2|EIF5A|EWSR1|FOS|XRCC6|NR6A1|GJA1|GSTP1|HINT1|HMGB1|HMGB2|HMGN1|HMGA1|HNRNPA1|HNRNPA2B1|HNRNPC|HNRNPD|HNRNPH1|HNRNPH3|HNRNPK|ILF3|EIF6|TNPO1|KRAS|MDK|HNRNPM|NCL|RPL10A|NEDD8|NME1|NME2|NONO|NPM1|PA2G4|PCBP2|PFN1|PHB|SERPINE2|PKM|POLR2F|POLR2G|POLR2H|POLR2I|PPP1CA|PSMA1|PSMA2|PSMA5|PSMA7|PSMB3|PSMB5|PSMC6|PSMD2|PSMD4|PSMD11|PSMD13|RBM4|RPL3|RPL4|RPL7A|RPL8|RPL9|RPL10|RPL12|RPL13|RPL17|RPL18|RPL21|RPL24|RPL26|RPL30|RPL29|RPL31|RPL35A|RPL37|RPL37A|RPL38|RPL39|RPL41|RPL36A|RPLP0|RPLP2|RPS2|RPS6|RPS9|RPS10|RPS11|RPS12|RPS13|RPS15A|RPS16|RPS17|RPS19|RPS20|RPS21|RPS24|RPS25|RPS27A|RPS28|RPS29|SET|SRSF2|SRSF3|SRSF5|SRSF7|TRA2B|SHC1|SHMT2|SOX4|SRP9|SRPK1|SSB|SUPT4H1|TAF9|ELOB|TMBIM6|TIA1|TSPAN6|TMSB4X|UBA52|UBB|UBC|UBE2D3|UBE2I|UBE2V1|VIM|EIF4H|YWHAB|YWHAZ|SF1|CNBP|ZNF121|ZNF207|CSDE1|DDX39B|MLF2|USP9X|PICALM|EIF3D|EDF1|RIOK3|CDC123|FUBP1|BUD31|LRRFIP1|HMGN3|TMEM59|RBM39|MORF4L2|BCLAF1|MATR3|RBM8A|DMTF1|NUTF2|EIF1|HNRNPR|SAP18|RACK1|C1D|MAD2L2|LAMTOR5|CELF1|KDM5B|SRSF10|PAPOLA|SUB1|MORF4L1|CPSF6|RPL35|PHB2|EXOSC8|RPL13A|RBFOX2|RPL36|SERBP1|PABPC1|SNX5|EIF3K|MRPL13|METTL5|CNOT7|ING4|TRMT112|LSM7|SLC38A2|MAGOHB|LAPTM4B|SUPT20H|GPBP1|TAF1D|ICE2|ZC3H14|PHF6|UQCC2|DPY30|CHURC1|COMMD6|ZNF384|CENPV|DPH3|JPX|STMP1
# Plot selected pathways
marvel <- BioPathways.Plot(MarvelObject=marvel,
go.terms=marvel$DE$BioPathways$Table$Term[c(1:10)],
y.label.size=10
)
marvel$DE$BioPathways$Plot
# Read GTF file
path <- "Data/GTF/"
file <- "gencode.v31.annotation.gtf"
gtf <- as.data.frame(fread(paste(path, file, sep=""), header=FALSE, stringsAsFactors=FALSE))
# Parse GTF
marvel <- ParseGTF(MarvelObject=marvel,
gtf=gtf
)
## [1] "Parsing attribute column..."
## [1] "Retrieving gene_id..."
## [1] "Retrieving transcript_id..."
## [1] "Retrieving transcript_type..."
# Find PTC
marvel <- FindPTC(MarvelObject=marvel,
psi.de.sig=0.10,
psi.de.diff=0.05
)
## [1] "15 SE +ve strand identified"
## [1] "13 SE -ve strand identified"
## [1] "22 RI +ve strand identified"
## [1] "13 RI -ve strand identified"
## [1] "4 A5SS +ve strand identified"
## [1] "9 A5SS -ve strand identified"
## [1] "11 A3SS +ve strand identified"
## [1] "14 A3SS -ve strand identified"
# Tabulate proportion of transcripts with PTC
# Show novel SJ and non-CDS transcripts
marvel <- PropPTC(MarvelObject=marvel,
xlabels.size=8,
show.NovelSJ.NoCDS=TRUE,
prop.test="chisq"
)
marvel$NMD$PTC.Prop$Plot
marvel$NMD$PTC.Prop$Plot.Stats
## event_type Novel SJ No CDS No PTC PTC pval
## SE SE 2 (2.9%) 35 (51.5%) 24 (35.3%) 7 (10.3%) 2.86390276997011e-07
## RI RI 16 (16%) 63 (63%) 7 (7%) 14 (14%)
## A5SS A5SS 3 (12.5%) 12 (50%) 8 (33.3%) 1 (4.2%)
## A3SS A3SS 0 (0%) 44 (50%) 21 (23.9%) 23 (26.1%)
# Do not show novel SJ and non-CDS transcripts
marvel <- PropPTC(MarvelObject=marvel,
xlabels.size=8,
show.NovelSJ.NoCDS=FALSE,
prop.test="chisq"
)
marvel$NMD$PTC.Prop$Plot
marvel$NMD$PTC.Prop$Plot.Stats
## event_type No PTC PTC pval
## SE SE 24 (77.4%) 7 (22.6%) 0.00153208539810952
## RI RI 7 (33.3%) 14 (66.7%)
## A5SS A5SS 8 (88.9%) 1 (11.1%)
## A3SS A3SS 21 (47.7%) 23 (52.3%)
# Investigate relationship between log2fc gene expression and NMD
marvel <- CompareExpr(MarvelObject=marvel,
xlabels.size=8
)
marvel$NMD$NMD.Expr$Plot
marvel$NMD$NMD.Expr$Plot.Stats
## event_type nmd.null.cells nmd.cells nmd.null.log2fc.mean nmd.log2fc.mean
## 1 SE 25 2 -0.5137446 -0.2474660
## 2 RI 27 6 -0.2576200 -1.4061163
## 3 A5SS 11 1 -0.9644353 0.1721040
## 4 A3SS 14 10 -1.0461200 -0.8342298
## p.val
## 1 0.962962963
## 2 0.008201754
## 3 0.166666667
## 4 0.796093932
# Plot NMD genes on gene expression volcano plot
marvel <- AnnoVolcanoPlot(MarvelObject=marvel,
anno=FALSE
)
marvel$NMD$AnnoVolcanoPlot$Plot
# Plot NMD genes on gene expression volcano plot: Annotate selected genes
marvel <- AnnoVolcanoPlot(MarvelObject=marvel,
anno=TRUE,
gene.label.x.below=0,
gene.label.y.above=5,
gene.label.size=2.5
)
marvel$NMD$AnnoVolcanoPlot$Plot